res=GET('https://developer.nps.gov/api/v1/parks?limit=500&api_key=B9nDpbkbrb3kSOjz6kXSxMJ3d6MSpUvt1QqYdeyn')

data = res %>% content("text") %>% jsonlite::fromJSON() %>% as_tibble()

NPS_data=data %>% unnest(data) %>% select(fullName,latitude,longitude,topics, activities,states, parkCode) %>%  janitor::clean_names() %>% 
  mutate(
    latitude = as.numeric(latitude), 
    longitude = as.numeric(longitude)
  ) %>% unnest(activities, names_sep = "_") %>% 
  unnest(topics, names_sep = "_")
visitation_data <- 
  read_csv("data/Query Builder for Public Use Statistics (1979 - Last Calendar Year).csv") %>% 
  janitor::clean_names() %>% 
  mutate(unit_code = tolower(unit_code)) %>% 
  rename(park_code = unit_code, 
         full_name = park_name) %>% 
  select(full_name, park_code, park_type, region, state, year, month, recreation_visits, tent_campers, rv_campers, tent_campers, backcountry)
## Rows: 101419 Columns: 35
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (10): ParkName, UnitCode, ParkType, Region, State, ParkNameTotal, UnitCo...
## dbl  (3): Year, Month, YearTotal
## num (22): RecreationVisits, NonRecreationVisits, RecreationHours, NonRecreat...
## 
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
combined_data <- full_join(NPS_data, visitation_data, by = c("park_code"))
## Warning in full_join(NPS_data, visitation_data, by = c("park_code")): Detected an unexpected many-to-many relationship between `x` and `y`.
## ℹ Row 1 of `x` matches multiple rows in `y`.
## ℹ Row 1 of `y` matches multiple rows in `x`.
## ℹ If a many-to-many relationship is expected, set `relationship =
##   "many-to-many"` to silence this warning.

faceted plot with rv, tent, and backcountry visits

visitation_data %>% 
  group_by(park_type, month) %>% 
  summarize(total_visitation = sum(recreation_visits)) %>% 
plot_ly(x = ~as.factor(month), y = ~total_visitation, type = "scatter", mode = "lines", color = ~park_type)
## `summarise()` has grouped output by 'park_type'. You can override using the
## `.groups` argument.
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
visit_summary <- 
  visitation_data %>% 
  group_by(park_type, month) %>% 
  summarize(total_tent = sum(tent_campers), 
            total_backcountry = sum(backcountry), 
            total_rv = sum(rv_campers))
## `summarise()` has grouped output by 'park_type'. You can override using the
## `.groups` argument.
tent_campers_plot <- plot_ly(visit_summary, 
                             x = ~as.factor(month), 
                             y = ~total_tent, 
                             type = "scatter", 
                             mode = "lines", 
                             color = ~park_type,
                             legendgroup = ~park_type) %>%
  layout(title = "Tent Campers")

backcountry_plot <- plot_ly(visit_summary, 
                            x = ~as.factor(month), 
                            y = ~total_backcountry, 
                            type = "scatter",
                            mode = "lines",
                            color = ~park_type,
                           legendgroup = ~park_type,
                           showlegend = FALSE) %>%
  layout(title = "Backcountry")

rv_campers_plot <- plot_ly(visit_summary, 
                           x = ~as.factor(month), 
                           y = ~total_rv, 
                           type = "scatter", 
                           mode = "lines",
                           color = ~park_type,
                          legendgroup = ~park_type,
                          showlegend = FALSE) %>%
  layout(title = "RV Campers")

subplot(
  tent_campers_plot, 
  backcountry_plot, 
  rv_campers_plot, 
  nrows = 2,  
  shareX = TRUE, 
  shareY = TRUE   
)
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

visits by season boxplot

visitation_data %>% 
  mutate(season = case_when(
    month %in% c(12,1,2) ~ "Winter", 
    month %in% c(3,4,5) ~ "Spring", 
    month %in% c(6,7,8) ~ "Summer", 
    TRUE ~ "Fall"
  )) %>% 
  plot_ly(x = ~season, y = ~recreation_visits, type = "box")

total visitation by year

visitation_data %>% 
  group_by(park_type, year) %>% 
  summarize(total_visitation = sum(recreation_visits)) %>% 
  ggplot(aes(x = as.factor(year), y = total_visitation, color = park_type, group = park_type)) + 
  geom_line(linewidth = 0.7) +  theme(axis.text.x = element_text(angle = 90, vjust = 0.5, hjust = 1))
## `summarise()` has grouped output by 'park_type'. You can override using the
## `.groups` argument.

total visitation by season

visitation_data %>% 
  mutate(season = case_when(
    month %in% c(12,1,2) ~ "Winter", 
    month %in% c(3,4,5) ~ "Spring", 
    month %in% c(6,7,8) ~ "Summer", 
    TRUE ~ "Fall"
  )) %>% 
  group_by(season) %>% 
  summarize(total_tent = sum(tent_campers), 
            backcountry_visits = sum(backcountry), 
            total_rv = sum(rv_campers)) %>% 
  pivot_longer(
    total_tent:total_rv, 
    values_to = "total_visit", 
    names_to = "type_visit",
    names_prefix = "total_"
  ) %>% 
  plot_ly(x = ~season, y = ~total_visit, color = ~type_visit, type = "bar")

mean visitation by park type and season

visitation_data %>% 
  mutate(season = case_when(
    month %in% c(12,1,2) ~ "Winter", 
    month %in% c(3,4,5) ~ "Spring", 
    month %in% c(6,7,8) ~ "Summer", 
    TRUE ~ "Fall"
  )) %>% 
  group_by(park_type, season) %>% 
  mutate(
    mean_total = mean(recreation_visits, na.rm = TRUE)
  ) %>% 
    plot_ly(x = ~season, y = ~mean_total, color = ~park_type)
## No trace type specified:
##   Based on info supplied, a 'bar' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#bar
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors
## Warning in RColorBrewer::brewer.pal(N, "Set2"): n too large, allowed maximum for palette Set2 is 8
## Returning the palette you asked for with that many colors

mean visitation season and visit type

visitation_data %>% 
  mutate(season = case_when(
    month %in% c(12,1,2) ~ "Winter", 
    month %in% c(3,4,5) ~ "Spring", 
    month %in% c(6,7,8) ~ "Summer", 
    TRUE ~ "Fall"
  )) %>% 
  group_by(season) %>% 
  summarize(mean_tent = mean(tent_campers), 
            mean_backcountry = mean(backcountry), 
            mean_rv = mean(rv_campers)) %>% 
  pivot_longer(
    mean_tent:mean_rv, 
    names_to = "type_visit", 
    values_to = "mean", 
    names_prefix = "mean_"
  ) %>% 
  plot_ly(x = ~season, y = ~mean, color = ~type_visit)
## No trace type specified:
##   Based on info supplied, a 'bar' trace seems appropriate.
##   Read more about this trace type -> https://plotly.com/r/reference/#bar

map of all national parks in US

leaflet(data = NPS_data) %>% 
  addTiles() %>% 
  addMarkers(~longitude, ~latitude) %>%
  setView(lng = -98.583, lat = 39.828, zoom = 4)

Mean recreation visit by park type (separate plots for each park type)

park_types <- unique(visitation_data$park_type)
plots <- list()
for (i in seq_along(park_types)) {
plots[[i]] <- visitation_data %>% 
    filter(park_type == park_types[i]) %>% 
    mutate(season = case_when(
      month %in% c(12, 1, 2) ~ "Winter", 
      month %in% c(3, 4, 5) ~ "Spring", 
      month %in% c(6, 7, 8) ~ "Summer", 
      TRUE ~ "Fall"
    )) %>% 
    group_by(season) %>%
    summarize(avg_visits = mean(recreation_visits, na.rm = TRUE)) %>%
    ggplot(aes(x = season, y = avg_visits, fill = season)) +
    geom_col() +
    ggtitle(paste("Average Visits by Season for", park_types[i])) 
} 
plots
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

## 
## [[11]]

## 
## [[12]]

## 
## [[13]]

## 
## [[14]]

## 
## [[15]]

## 
## [[16]]

## 
## [[17]]

## 
## [[18]]

National Preserves has pretty similar avg visits across seasons

combined_data %>% 
  filter(park_type == "National Preserve") %>% 
  group_by(activities_name) %>% distinct(activities_name) %>% knitr::kable()
activities_name
Arts and Culture
Cultural Demonstrations
Auto and ATV
ATV Off-Roading
Scenic Driving
Astronomy
Stargazing
Biking
Road Biking
Boating
Boat Tour
Camping
Backcountry Camping
Car or Front Country Camping
Group Camping
RV Camping
Food
Picnicking
Guided Tours
Bus/Shuttle Guided Tour
Hiking
Backcountry Hiking
Front-Country Hiking
Hunting and Gathering
Hunting
Paddling
Canoeing
Kayaking
Junior Ranger Program
Wildlife Watching
Birdwatching
Park Film
Museum Exhibits
Shopping
Bookstore and Park Store
Canoe or Kayak Camping
Fishing
Hands-On
Citizen Science
Gift Shop and Souvenirs
Mountain Biking
Climbing
Rock Climbing
Freshwater Fishing
Fly Fishing
Horse Trekking
Horseback Riding
Swimming
Freshwater Swimming
Auto Off-Roading
Horse Camping (see also Horse/Stock Use)
Off-Trail Permitted Hiking
Horse Camping (see also camping)
Self-Guided Tours - Walking
Living History
Motorized Boating
Self-Guided Tours - Auto
Historic Weapons Demonstration
Stand Up Paddleboarding
Saltwater Swimming
Skiing
Cross-Country Skiing
Snowshoeing

National Preserves has skiing, cross country skiing and snowshoeing, which might be the reason why there is a higher average visit in winter than any other park type.

Average visitation by type and park_type (separate graph for each park_type)

park_types <- unique(visitation_data$park_type)
plots <- list()

for (i in seq_along(park_types)) {
  visit_summary <- visitation_data %>% 
    filter(park_type == park_types[i]) %>% 
    mutate(season = case_when(
      month %in% c(12, 1, 2) ~ "Winter", 
      month %in% c(3, 4, 5) ~ "Spring", 
      month %in% c(6, 7, 8) ~ "Summer", 
      TRUE ~ "Fall"
    )) %>% 
    group_by(season) %>% 
    summarize(
      mean_tent = mean(tent_campers, na.rm = TRUE), 
      mean_backcountry = mean(backcountry, na.rm = TRUE), 
      mean_rv = mean(rv_campers, na.rm = TRUE)
    ) %>% 
    pivot_longer(
      cols = starts_with("mean_"), 
      names_to = "type_visit", 
      values_to = "mean", 
      names_prefix = "mean_"
    )
  

  plots[[i]] <- ggplot(visit_summary, aes(x = season, y = mean)) + 
    geom_col() + facet_grid(~type_visit) +
    ggtitle(paste("Average Visits by Season for", park_types[i]))
}

plots
## [[1]]

## 
## [[2]]

## 
## [[3]]

## 
## [[4]]

## 
## [[5]]

## 
## [[6]]

## 
## [[7]]

## 
## [[8]]

## 
## [[9]]

## 
## [[10]]

## 
## [[11]]

## 
## [[12]]

## 
## [[13]]

## 
## [[14]]

## 
## [[15]]

## 
## [[16]]

## 
## [[17]]

## 
## [[18]]

Regional Comparisons

region_data <- combined_data %>% 
  mutate(region = case_when(
    states %in% c("CT", "RI", "NH", "VT", "NJ", "NY", "PA", "MD", "ME", "MA") ~ "northeast", 
    states %in% c("IL","IN", "MI", "OH", "WI", "IA", "KS", "MN", "MO", "NE", "ND", "SD") ~ "midwest", 
    states %in% c("FL", "GA", "NC", "SC", "VA", "DE", "WV", "AL", "KY", "MS", "TN", "AR", "LA", "OK", "TX", "DC") ~ "south", 
    states %in% c("AK", "CA", "HI", "OR", "WA") ~ "west"
  ))

Northeast

region_data %>%
  filter(region == "northeast") %>% drop_na() %>% 
  distinct(park_type) %>% knitr::kable()
park_type
National Park
National Historical Park
National Monument
National Historic Site
National Battlefield
National Seashore
Park (Other)
National Memorial
National Military Park
International Historic Site
region_data %>%
  filter(region == "northeast") %>% distinct(activities_name) 
## # A tibble: 80 × 1
##    activities_name        
##    <chr>                  
##  1 Arts and Culture       
##  2 Cultural Demonstrations
##  3 Astronomy              
##  4 Stargazing             
##  5 Biking                 
##  6 Boating                
##  7 Camping                
##  8 Group Camping          
##  9 Climbing               
## 10 Rock Climbing          
## # ℹ 70 more rows
region_data %>%
  filter(region == "northeast") %>% 
  mutate(season = case_when(
      month %in% c(12, 1, 2) ~ "Winter", 
      month %in% c(3, 4, 5) ~ "Spring", 
      month %in% c(6, 7, 8) ~ "Summer", 
      TRUE ~ "Fall"
    )) %>% 
    group_by(season) %>% 
    summarize(
      mean_tent = mean(tent_campers, na.rm = TRUE), 
      mean_backcountry = mean(backcountry, na.rm = TRUE), 
      mean_rv = mean(rv_campers, na.rm = TRUE)
    ) %>% 
    pivot_longer(
      cols = starts_with("mean_"), 
      names_to = "type_visit", 
      values_to = "mean", 
      names_prefix = "mean_"
    ) %>% ggplot(aes(x = season, y = mean)) + geom_col() +  facet_grid(~type_visit)

Midwest

region_data %>%
  filter(region == "midwest") %>%
  drop_na() %>% 
  distinct(park_type) %>% knitr::kable()
park_type
National Monument
National Lakeshore
National Park
National Historical Park
National Historic Site
National Memorial
National River
National Wild & Scenic River
National Battlefield Park
National Preserve
National Battlefield
region_data %>%
  filter(region == "midwest") %>% distinct(activities_name) 
## # A tibble: 83 × 1
##    activities_name            
##    <chr>                      
##  1 Arts and Culture           
##  2 Cultural Demonstrations    
##  3 Astronomy                  
##  4 Stargazing                 
##  5 Food                       
##  6 Picnicking                 
##  7 Guided Tours               
##  8 Self-Guided Tours - Walking
##  9 Hiking                     
## 10 Junior Ranger Program      
## # ℹ 73 more rows
region_data %>%
  filter(region == "midwest") %>% 
  mutate(season = case_when(
      month %in% c(12, 1, 2) ~ "Winter", 
      month %in% c(3, 4, 5) ~ "Spring", 
      month %in% c(6, 7, 8) ~ "Summer", 
      TRUE ~ "Fall"
    )) %>% 
    group_by(season) %>% 
    summarize(
      mean_tent = mean(tent_campers, na.rm = TRUE), 
      mean_backcountry = mean(backcountry, na.rm = TRUE), 
      mean_rv = mean(rv_campers, na.rm = TRUE)
    ) %>% 
    pivot_longer(
      cols = starts_with("mean_"), 
      names_to = "type_visit", 
      values_to = "mean", 
      names_prefix = "mean_"
    ) %>% ggplot(aes(x = season, y = mean)) + geom_col() +  facet_grid(~type_visit)

South

region_data %>%
  filter(region == "south") %>%
  drop_na() %>% 
  distinct(park_type) %>% 
  knitr::kable()
park_type
National Historical Park
National Monument
National Recreation Area
National Historic Site
National Memorial
National Park
National Preserve
National Wild & Scenic River
National River
National Seashore
National Battlefield
National Military Park
National Battlefield Park
Park (Other)
region_data %>%
  filter(region == "south") %>%
  distinct(activities_name) 
## # A tibble: 89 × 1
##    activities_name            
##    <chr>                      
##  1 Astronomy                  
##  2 Stargazing                 
##  3 Food                       
##  4 Picnicking                 
##  5 Guided Tours               
##  6 Self-Guided Tours - Walking
##  7 Hands-On                   
##  8 Junior Ranger Program      
##  9 Wildlife Watching          
## 10 Birdwatching               
## # ℹ 79 more rows
region_data %>%
  filter(region == "south") %>% 
  mutate(season = case_when(
      month %in% c(12, 1, 2) ~ "Winter", 
      month %in% c(3, 4, 5) ~ "Spring", 
      month %in% c(6, 7, 8) ~ "Summer", 
      TRUE ~ "Fall"
    )) %>% 
    group_by(season) %>% 
    summarize(
      mean_tent = mean(tent_campers, na.rm = TRUE), 
      mean_backcountry = mean(backcountry, na.rm = TRUE), 
      mean_rv = mean(rv_campers, na.rm = TRUE)
    ) %>% 
    pivot_longer(
      cols = starts_with("mean_"), 
      names_to = "type_visit", 
      values_to = "mean", 
      names_prefix = "mean_"
    ) %>% ggplot(aes(x = season, y = mean)) + geom_col() +  facet_grid(~type_visit)

west

region_data %>% filter(region == "west") %>% drop_na() %>% 
  distinct(park_type) %>% knitr::kable()
park_type
National Monument
National Park
National Historic Site
National Recreation Area
National Historical Park
National Preserve
National Seashore
National Memorial
region_data %>% filter(region == "west") %>% distinct(activities_name) 
## # A tibble: 99 × 1
##    activities_name        
##    <chr>                  
##  1 Arts and Culture       
##  2 Cultural Demonstrations
##  3 Auto and ATV           
##  4 Scenic Driving         
##  5 Biking                 
##  6 Mountain Biking        
##  7 Road Biking            
##  8 Boating                
##  9 Motorized Boating      
## 10 Sailing                
## # ℹ 89 more rows
region_data %>% 
  filter(region == "west") %>% 
  mutate(season = case_when(
      month %in% c(12, 1, 2) ~ "Winter", 
      month %in% c(3, 4, 5) ~ "Spring", 
      month %in% c(6, 7, 8) ~ "Summer", 
      TRUE ~ "Fall"
    )) %>% 
    group_by(season) %>% 
    summarize(
      mean_tent = mean(tent_campers, na.rm = TRUE), 
      mean_backcountry = mean(backcountry, na.rm = TRUE), 
      mean_rv = mean(rv_campers, na.rm = TRUE)
    ) %>% 
    pivot_longer(
      cols = starts_with("mean_"), 
      names_to = "type_visit", 
      values_to = "mean", 
      names_prefix = "mean_"
    ) %>% ggplot(aes(x = season, y = mean)) + geom_col() +  facet_grid(~type_visit)

Comparing Regions

region_data %>% group_by(region) %>% 
  summarize(avg_visitation = mean(recreation_visits, na.rm = TRUE))
## # A tibble: 5 × 2
##   region    avg_visitation
##   <chr>              <dbl>
## 1 midwest           47256.
## 2 northeast         61293.
## 3 south             57988.
## 4 west             194391.
## 5 <NA>             139437.